knitr::opts_chunk$set(echo = TRUE)
Introduction
# Load Packages ----
library(tidyverse)
library(plotly)
library(broom)
library(knitr)
library(kableExtra)
# Load Data ----
afl <- read.csv("/home/gthornton1999/VU/3. Spatiotemporal/Assignment 3/Data/Assessment3_SpatiotemporalDataset2.csv")
# Wrangle ----
#column names
colnames(afl)
## [1] "SampleNumber" "Time_Seconds" "Time_Minutes" "Latitude" "Longitude"
## [6] "Velocity" "Acceleration" "Game" "Action" "Symbol"
## [11] "Location" "Quarter"
# how many unique actions are there?
unique(afl$Action)
## [1] NA "Handball Effective"
## [3] "Handball Clanger" "Kick Short"
## [5] "Handball Ineffective" "Kick Ineffective"
## [7] "Kick Backwards" "Kick Inside 50"
## [9] "Tackle" "Knock On Effective"
## [11] "Spoil" "Ground Kick Ineffective"
## [13] "Kick In Short" "Kick Clanger"
## [15] "Kick Long" "1-on-1 Contest Defender"
## [17] "Smotherer After Disposal" "Spoil Gaining"
## [19] "Kick Long To Advantage"
# what type are eahc variable
str(afl)
## 'data.frame': 336018 obs. of 12 variables:
## $ SampleNumber: int 1 2 3 4 5 6 7 8 9 10 ...
## $ Time_Seconds: num 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 ...
## $ Time_Minutes: num 0.00167 0.00333 0.005 0.00667 0.00833 ...
## $ Latitude : num -37.8 -37.8 -37.8 -37.8 -37.8 ...
## $ Longitude : num 145 145 145 145 145 ...
## $ Velocity : num 0.17 0.272 0.517 0.857 1.097 ...
## $ Acceleration: num 0.318 0.491 0.981 1.586 1.792 ...
## $ Game : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Action : chr NA NA NA NA ...
## $ Symbol : chr NA NA NA NA ...
## $ Location : chr NA NA NA NA ...
## $ Quarter : int NA NA NA NA NA NA NA NA NA NA ...
# Define Fatigue using "Quarter" as the independent variable.
# bin the quarters into fatigue levels
# assuming relatively linear increase in fatigue
afl$Fatigue_Level <- case_when(
afl$Quarter == 1 ~ "Low", # Quarter 1: Early in the game
afl$Quarter == 2 ~ "Low", # Quarter 2: Still relatively fresh
afl$Quarter == 3 ~ "Moderate", # Quarter 3: Fatigue starting to accumulate
afl$Quarter == 4 ~ "High" # Quarter 4: High fatigue
)
# Define action variables into "effective" and "ineffective"
# Effective actions
effective_actions <- afl %>%
filter(Action %in% c("Handball Effective", "Kick Long To Advantage",
"Knock On Effective", "Spoil Gaining",
"Kick Inside 50", "Kick Long",
"Smotherer After Disposal"))
# Ineffective or Clanger actions
ineffective_actions <- afl %>%
filter(Action %in% c("Handball Clanger", "Kick Clanger",
"Handball Ineffective", "Kick Ineffective",
"Ground Kick Ineffective"))
Regression Analysis
Does fatigue have a impact on skill execution?
# Regression ----
# Does fatigue have a impact on skill execution?
# Step 1: Create a binary skill execution variable on both skill df's
effective_actions <- effective_actions %>%
mutate(EffectiveBinary = 1)
ineffective_actions <- ineffective_actions %>%
mutate(EffectiveBinary = 0)
# Step 2: Combine skill df's into 1 df
afl_skill <- bind_rows(effective_actions, ineffective_actions)
# Step 3: Run a logistic regression
# will analyse whether fatigue level is significantly associated with the probability of executing an effective action
skillvfatigue_model <- glm(EffectiveBinary ~ Fatigue_Level, family = "binomial", data = afl_skill)
# Summarise regression
summary(skillvfatigue_model)
##
## Call:
## glm(formula = EffectiveBinary ~ Fatigue_Level, family = "binomial",
## data = afl_skill)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4528 -1.3856 0.9250 0.9826 1.0203
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.38137 0.17336 2.200 0.0278 *
## Fatigue_LevelLow 0.09589 0.21397 0.448 0.6540
## Fatigue_LevelModerate 0.24618 0.23324 1.055 0.2912
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 779.51 on 587 degrees of freedom
## Residual deviance: 778.34 on 585 degrees of freedom
## AIC: 784.34
##
## Number of Fisher Scoring iterations: 4
# make neat table using broom package
regression_table <- tidy(skillvfatigue_model)
# add odds ratios
regression_table <- regression_table %>%
mutate(odds_ratio = exp(estimate))
# make it neat with kable
regression_table %>%
select(term, estimate, std.error, odds_ratio, p.value) %>%
mutate(across(where(is.numeric), ~ round(., 3))) %>%
kable(col.names = c("Term", "Estimate", "Std. Error", "Odds Ratio", "P-Value"),
caption = "Logistic Regression: Effect of Fatigue on Skill Execution") %>%
kable_styling(bootstrap_options = c("striped", "condensed"), full_width = FALSE) %>%
row_spec(0, bold = TRUE)
Logistic Regression: Effect of Fatigue on Skill Execution
|
Term
|
Estimate
|
Std. Error
|
Odds Ratio
|
P-Value
|
|
(Intercept)
|
0.381
|
0.173
|
1.464
|
0.028
|
|
Fatigue_LevelLow
|
0.096
|
0.214
|
1.101
|
0.654
|
|
Fatigue_LevelModerate
|
0.246
|
0.233
|
1.279
|
0.291
|
# Tidy the model output
model_summary <- tidy(skillvfatigue_model) %>%
mutate(
Odds_Ratio = exp(estimate), # convert coefficients to odds ratios
estimate = round(estimate, 3),
std.error = round(std.error, 3),
statistic = round(statistic, 3),
p.value = round(p.value, 3),
Odds_Ratio = round(Odds_Ratio, 3)
)
# Rename columns for clarity
colnames(model_summary) <- c("Term", "Coefficient", "Std. Error", "Z value", "P value", "Odds Ratio")
# Create a neat table
kable(model_summary, caption = "Logistic Regression Summary: Effect of Fatigue on Skill Execution") %>%
kable_styling(full_width = FALSE, position = "center", bootstrap_options = c("striped", "hover"))
Logistic Regression Summary: Effect of Fatigue on Skill Execution
|
Term
|
Coefficient
|
Std. Error
|
Z value
|
P value
|
Odds Ratio
|
|
(Intercept)
|
0.381
|
0.173
|
2.200
|
0.028
|
1.464
|
|
Fatigue_LevelLow
|
0.096
|
0.214
|
0.448
|
0.654
|
1.101
|
|
Fatigue_LevelModerate
|
0.246
|
0.233
|
1.055
|
0.291
|
1.279
|
# Step 4: Interpret summary statistics
# Performance under high fatigue (intercept) is statistically significant (P value = 0.028)
# The coefficients of Low and Moderate fatigue levels are positive = the player performs slightly better under less fatigue
# However, the P values are greater than 0.05 = the differences in skill execution and fatigue levels are not statistically significant
# Step 5: Visualise the effect
ggplot(afl_skill, aes(x = Fatigue_Level, fill = as.factor(EffectiveBinary))) +
geom_bar(position = "fill") +
labs(x = "Fatigue Level", y = "Proportion of Actions", title = "Skill Execution by Fatigue Level") +
scale_fill_manual(values = c("0" = "orange", "1" = "purple"),
labels = c("Ineffective", "Effective"),
name = "Skill Execution") +
theme_minimal()

# Shows the most ineffective skills occur when High Fatigue
# The least is under moderate fatigue - why??
# Step 6: Convert coefficients to Odds Ratios
# tells us how the odds of executing an effective action change as fatigue level increases
exp(coef(skillvfatigue_model))
## (Intercept) Fatigue_LevelLow Fatigue_LevelModerate
## 1.464286 1.100639 1.279133
# intercept = 1.46 -> baseline of odds of an effective action when the fatigue level is high
# Fatigue Level Low = 1.10 -> odds of an effective action are 1.10x higher in Low Fatigue than in High
# Fatigue Level Moderate = 1.28 -> odds of an effective skill action are 1.28x higher in Moderate than in High Fatigue
# Step 7: Overall interpretation
# As fatigue increases, the odds of performing an effective action decreases.
# players are more likely to perform well under low or moderate fatigue than high
#
Spatial Distribution of Errors
Using Fatigue levels to assess where skill errors commonly occur
# Spatial Distribution of Errors ----
# Using Fatigue levels to assess where skill errors commonly occur
# Step 1: Classify Actions
# already did this when creating the "ineffective_actions" data frame
# Step 2: Group each individual ineffective action and remove multiple data points for the same action until there is one.
ineffective_actions_single <- ineffective_actions %>%
group_by(Action, Location, Quarter, Game) %>%
slice(1) %>% # Take the first time stamp for that action group
ungroup()
# Step 3: Plot bar chart using Location variables (field zones)
ggplot(ineffective_actions_single, aes(x = Location, fill = Fatigue_Level)) +
geom_bar(position = "dodge") +
labs(
title = "Count of Ineffective Actions by Field Location and Fatigue Level",
x = "Field Zone",
y = "Count of Clangers",
fill = "Fatigue Level"
) +
theme_minimal()

Skill Outcome by Field Location and Fatigue Level
# Comparing effective vs. ineffective actions across field locations and fatigue levels.
# Step 1: Creating df for individual effective actions (same method as Step 2)
effective_actions_single <- effective_actions %>%
group_by(Action, Location, Quarter, Game) %>%
slice(1) %>% # Take the first time stamp for that action group
ungroup()
# Step 2: Create "Skill_Outcome" variable in each df
ineffective_actions_single <- ineffective_actions_single %>%
mutate(Skill_Outcome = "Ineffective")
effective_actions_single <- effective_actions_single %>%
mutate(Skill_Outcome = "Effective")
# Step 3: Combine the 2 df
combined_actions_single <- bind_rows(effective_actions_single, ineffective_actions_single)
# Step 4: Create a data frame for the counts of Skill Outcomes
counts_df <- combined_actions_single %>%
group_by(Location, Skill_Outcome, Fatigue_Level) %>%
summarise(Count = n(), .groups = "drop")
# Step 5: Create ggplot value
spatial_interactive <- ggplot(counts_df, aes(x = Location, y = Count, fill = Skill_Outcome,
text = paste0(
"Location: ", Location, "<br>",
"Skill Outcome: ", Skill_Outcome, "<br>",
"Fatigue Level: ", Fatigue_Level, "<br>",
"Count: ", Count
))) +
geom_col(position = "dodge") +
facet_wrap(~ Fatigue_Level) +
labs(
title = "Skill Outcome by Field Location and Fatigue Level",
x = " ",
y = "Action Count",
fill = "Skill Outcome"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Step 6: Convert to interactive plot
ggplotly(spatial_interactive, tooltip = "text")